
################################################################################# 
#                     Load Packages                                            #
################################################################################# 

packages = c("brglm","ggcorrplot","interplot", "stargazer", "tidyverse",
             "here","haven","mice","modelsummary", "sandwich", "lmtest",
             "MASS", "ROCR", "generalhoslem", "brant", "gridExtra",
             "erer", "margins")
#Warning! This will install R packages in your system if not yet installed. 
#load or install & load all packages 
package.check <- lapply(
  packages,
  FUN = function(x) {
    if (!require(x, character.only = TRUE)) {
      install.packages(x, dependencies = TRUE)
      library(x, character.only = TRUE)
    }
  }
)

################################################################################# 
#                     Prepare Data                                             #
################################################################################# 

#####Inport Data ######
VDem <- read.csv(here::here("Vdem","V-Dem-CY-Full+Others-v12.csv"))
LEDs<- read_dta(here::here("CochranLong2017ReplicationData","ReiterStamISReplicationAugust2016.dta"))

#####Clean VDem Data
vdem_clean <- VDem  %>% subset(select = c(country_name,
                                          COWcode,
                                          year,
                                          v2x_corr,
                                          e_gdppc,
                                          v2x_ex_military,
                                          v2x_libdem,
                                          v2x_polyarchy,
                                          v2regsupgroups_6)) 

vdem_clean$pre_war_year <-vdem_clean$year -1 
vdem_clean$COWcode <- ifelse(vdem_clean$country_name == "Austria", 300, vdem_clean$COWcode )
vdem_clean$COWcode <- ifelse(vdem_clean$country_name == "Piedmont-Sardinia", 325, vdem_clean$COWcode )


growth_rate <- function(x)(x/lag(x)-1)*100 
vdem_clean <- vdem_clean %>% group_by(COWcode) %>% mutate(gdppc_growth = growth_rate(e_gdppc)) 

#####Clean CochranLong2017 (LED) data
LEDs2 <- LEDs %>% subset(select = c(RSwarnumber,
                                    name,
                                    year,
                                    ccode,
                                    actor_na,
                                    wl,
                                    wdldownes,
                                    init,
                                    target,
                                    initally, 
                                    targally,
                                    terrain,
                                    strat1,
                                    strat2,
                                    strat3,
                                    strat4,
                                    strat5,
                                    strat,
                                    straterr,
                                    concap,
                                    capasst,
                                    qualrat,
                                    polini,
                                    poltarg,
                                    politics,
                                    pol21,
                                    pol21initally,
                                    pol21targally,
                                    pcini1old,
                                    pcini2old,
                                    logLER))

LEDs2$pre_war_year <-LEDs$year - 1
LEDs2 <- LEDs2 %>% dplyr::rename(COWcode = ccode)
LEDs2 <- LEDs2 %>% group_by(RSwarnumber) %>% fill(year, .direction = "downup") %>% drop_na(RSwarnumber)

#####Merge######
LEDs_data <- left_join(LEDs2,vdem_clean, by = c("COWcode", "pre_war_year"))
LEDs_data <- LEDs_data %>% subset(select = c(-year.y))
LEDs_data <- LEDs_data %>% dplyr::rename(year = year.x)

LEDs_data$wldownes <- ifelse(is.na(LEDs_data$wdldownes), 
                             NA, ifelse(LEDs_data$wdldownes == 2, 1, 0))
LEDs_data$wdldownes <-  as.factor(LEDs_data$wdldownes)
LEDs_data$wl <- ifelse(LEDs_data$wl == 0, 0,1)

########## IMpute Missing Data########
#Check Percentage of missing data for key variables 
pMiss <- function(x){sum(is.na(x))/length(x)*100}
pe_missing <- apply(LEDs_data,2,pMiss)
pe_missing
stargazer(LEDs_data)
mean(pe_missing)
#remove DVs
LEDs_data_dv <- LEDs_data %>% subset(select = c(year,
                                                COWcode,
                                                RSwarnumber,
                                                wl,
                                                wdldownes,
                                                wldownes,
                                                logLER))

LEDs_data_iv <- LEDs_data %>% subset(select = c(-wl,
                                                -wdldownes,
                                                -wldownes,
                                                -logLER))

#Impute Missing
pred <- quickpred(LEDs_data_iv,exc=c( "country_name",
                                      "histname", "COWcode", "year", 
                                      "actor_na" ))

imp_dat <- mice(LEDs_data_iv, m=20,maxit=50, meth='cart',pred, #try with 20 see if stable
                seed=500,remove_collinear = FALSE,)
summary(imp_dat)
t <- as.data.frame(imp_dat$loggedEvents)
LEDs_data_f<- complete(imp_dat) 
pe_missing <- apply(LEDs_data_f,2,pMiss)
pe_missing

LEDs_data_f <- left_join(LEDs_data_dv,LEDs_data_f, by = c("COWcode", "year",
                                                          "RSwarnumber"))

#####Create New Variables######
#Full Data
LEDs_data_f$v2x_corr_init <-LEDs_data_f$v2x_corr*LEDs_data_f$init
LEDs_data_f$v2x_corr_target <- LEDs_data_f$v2x_corr*LEDs_data_f$target

LEDs_data_f$e_gdppc_init <-LEDs_data_f$e_gdppc*LEDs_data_f$init 
LEDs_data_f$e_gdppc_target <- LEDs_data_f$e_gdppc*LEDs_data_f$target

LEDs_data_f$gdppc_growth_init <-LEDs_data_f$gdppc_growth*LEDs_data_f$init
LEDs_data_f$gdppc_growth_target <- LEDs_data_f$gdppc_growth*LEDs_data_f$target

LEDs_data_f3 <- LEDs_data_f %>% dplyr::select(RSwarnumber,
                                              init,
                                              target,
                                              v2x_corr_init,
                                              v2x_corr_target,
                                              e_gdppc_init,
                                              e_gdppc_target,
                                              gdppc_growth_init,
                                              gdppc_growth_target)

LEDs_data_f2 <-LEDs_data_f %>% group_by(RSwarnumber,init) %>% summarise(init_corr_mean = mean(v2x_corr_init),
                                                                        target_corr_mean = mean(v2x_corr_target),
                                                                        e_gdppc_init_corr_mean = mean(e_gdppc_init),
                                                                        e_gdppc_target_mean = mean(e_gdppc_target),
                                                                        gdppc_growth_init_mean = mean(gdppc_growth_init),
                                                                        gdppc_growth_target_mean = mean(gdppc_growth_target))

LEDs_data_f2 <-LEDs_data_f2 %>% group_by(RSwarnumber) %>% summarise(init_corr_mean = sum(init_corr_mean),
                                                                    target_corr_mean = sum(target_corr_mean),
                                                                    e_gdppc_init_corr_mean = sum(e_gdppc_init_corr_mean),
                                                                    e_gdppc_target_mean = sum(e_gdppc_target_mean),
                                                                    gdppc_growth_init_mean = sum(gdppc_growth_init_mean),
                                                                    gdppc_growth_target_mean = sum(gdppc_growth_target_mean))

LEDs_data_f<- left_join(LEDs_data_f, LEDs_data_f2, by = c("RSwarnumber"))

LEDs_data_f$opp_corr <- ifelse(LEDs_data_f$init==1, LEDs_data_f$target_corr_mean, LEDs_data_f$init_corr_mean)
LEDs_data_f$opp_gdppc <- ifelse(LEDs_data_f$init==1, LEDs_data_f$e_gdppc_target_mean, LEDs_data_f$e_gdppc_init_corr_mean)
LEDs_data_f$opp_gdppc_growth <- ifelse(LEDs_data_f$init==1, LEDs_data_f$gdppc_growth_target_mean, LEDs_data_f$gdppc_growth_init_mean)


LEDs_data_f$relative_corr <- LEDs_data_f$v2x_corr - LEDs_data_f$opp_corr
LEDs_data_f$relative_gdppc <- LEDs_data_f$e_gdppc - LEDs_data_f$opp_gdppc
LEDs_data_f$relative_gdppc_growth  <- LEDs_data_f$gdppc_growth - LEDs_data_f$opp_gdppc_growth

LEDs_data_f$v2x_libdem_init <-LEDs_data_f$v2x_libdem*LEDs_data_f$init
LEDs_data_f$v2x_libdem_target <- LEDs_data_f$v2x_libdem*LEDs_data_f$target
LEDs_data_f$relative_corr_init <-LEDs_data_f$relative_corr*LEDs_data_f$init
LEDs_data_f$relative_corr_target <- LEDs_data_f$relative_corr*LEDs_data_f$target

#STOP

#write.csv(LEDs_data_f, here::here("replication_data_afs.csv")) #remove initial # if re-imputing data.
LEDs_data_f <- read.csv(here::here("replication_data_afs.csv"))

#List Wise Deletion Data
LEDs_data$v2x_corr_init <-LEDs_data$v2x_corr*LEDs_data$init
LEDs_data$v2x_corr_target <- LEDs_data$v2x_corr*LEDs_data$target

LEDs_data$e_gdppc_init <-LEDs_data$e_gdppc*LEDs_data$init 
LEDs_data$e_gdppc_target <- LEDs_data$e_gdppc*LEDs_data$target

LEDs_data$gdppc_growth_init <-LEDs_data$gdppc_growth*LEDs_data$init
LEDs_data$gdppc_growth_target <- LEDs_data$gdppc_growth*LEDs_data$target


LEDs_data3 <- LEDs_data %>% dplyr::select(RSwarnumber,
                                          init,
                                          target)

LEDs_data2 <-LEDs_data %>% group_by(RSwarnumber,init) %>% summarise(init_corr_mean = mean(v2x_corr_init),
                                                                    target_corr_mean = mean(v2x_corr_target),
                                                                    e_gdppc_init_corr_mean = mean(e_gdppc_init),
                                                                    e_gdppc_target_mean = mean(e_gdppc_target),
                                                                    gdppc_growth_init_mean = mean(gdppc_growth_init),
                                                                    gdppc_growth_target_mean = mean(gdppc_growth_target))

LEDs_data2 <-LEDs_data2 %>% group_by(RSwarnumber) %>% summarise(init_corr_mean = sum(init_corr_mean),
                                                                target_corr_mean = sum(target_corr_mean),
                                                                e_gdppc_init_corr_mean = sum(e_gdppc_init_corr_mean),
                                                                e_gdppc_target_mean = sum(e_gdppc_target_mean),
                                                                gdppc_growth_init_mean = sum(gdppc_growth_init_mean),
                                                                gdppc_growth_target_mean = sum(gdppc_growth_target_mean))

LEDs_data<- left_join(LEDs_data, LEDs_data2, by = c("RSwarnumber"))

LEDs_data$opp_corr <- ifelse(LEDs_data$init==1, LEDs_data$target_corr_mean, LEDs_data$init_corr_mean)
LEDs_data$opp_gdppc <- ifelse(LEDs_data$init==1, LEDs_data$e_gdppc_target_mean, LEDs_data$e_gdppc_init_corr_mean)
LEDs_data$opp_gdppc_growth <- ifelse(LEDs_data$init==1, LEDs_data$gdppc_growth_target_mean, LEDs_data$gdppc_growth_init_mean)


LEDs_data$relative_corr <- LEDs_data$v2x_corr - LEDs_data$opp_corr
LEDs_data$relative_gdppc <- LEDs_data$e_gdppc - LEDs_data$opp_gdppc
LEDs_data$relative_gdppc_growth  <- LEDs_data$gdppc_growth - LEDs_data$opp_gdppc_growth


LEDs_data$v2x_libdem_init <-LEDs_data$v2x_libdem*LEDs_data$init
LEDs_data$v2x_libdem_target <- LEDs_data$v2x_libdem*LEDs_data$target
LEDs_data$relative_corr_init <-LEDs_data$relative_corr*LEDs_data$init
LEDs_data$relative_corr_target <- LEDs_data$relative_corr*LEDs_data$target

#write.csv(LEDs_data, here::here("replication_data_missingobs_afs.csv"))
LEDs_data_f_mo <- read.csv(here::here("replication_data_missingobs_afs.csv"))
